** Loading data - generated in dofile: data05
use "$data\minwage_data5_estimation_2012_2015.dta", clear
* 49 event-month oberservation window & excluding apprentices & conditional on employment
keep if inrange(tline,-24,24) & apprentice == 0 &  emplyd == 1 

* we use 2015 for employment fractions for hourly wages.
keep if indkaar == 2015 

** Generating age from event-time
drop if tline == 0 // dropping the month the individudals turn 18: hourly wage can be both young and adult.
ge ealder = 16 if inrange(tline, -24, -13)
replace ealder = 17 if inrange(tline, -12, -1)
replace ealder = 18 if inrange(tline, 1, 12)
replace ealder = 19 if inrange(tline, 13, 24)
ge ealder1617=0
replace ealder1617=1 if inrange(tline, -24, -1) // dummy for young workers 16-17 yr-old

** Generating setor variable using the first 4 digits from SKATs hirachical sector variable: es7606_hov_br_nr  
tostring es7606_hov_br_nr, replace force
ge es7606_sub4 = substr(es7606_hov_br_nr,1, 4)
drop es7606_hov_br_nr
rename es7606_sub4 es7606_hov_br_nr
destring es7606_hov_br_nr, replace

** Finding the top 20 most common sectors for youth employment
preserve
	keep if inrange(ealder, 16, 17) & emplyd == 1 // keeping youth employed
	replace es7606_hov_br_nr = 0 if es7606_hov_br_nr==. // observations do not have sector information

	collapse (count) observations = pnr ///
		, by(es7606_hov_br_nr)

	sum observations
	global tot = r(sum)
	gen emply_frac  = (observations / $tot)*100
	drop if es7606_hov_br_nr == 0 
	gsort -emply_frac
	drop if _n > 20
	ge es7606_rank_2015 = _n
	ge top20=1
	rename observations top20_OBS
	rename emply_frac top20_emply_frac
	** data on 20 most common sectors for youth employment
	save "$data\minwage_descrp03_EStop20.dta", replace 	
restore

** Merging data on 20 most common sectors for youth employment on main data set
merge m:1 es7606_hov_br_nr using "$data\minwage_descrp03_EStop20.dta", nogen

ge ES15 = es7606_hov_br_nr if top20==1 // Variable with sector code information for top 20 sectors
replace ES15 = 0 if top20==. // All other sectors (outside top 20) set to zero.

** generating a few age specific hourly wage variables 
ge calc_hrly_wage1617 = calc_hrly_wage if inrange(ealder, 16, 17)
ge calc_hrly_wage16 = calc_hrly_wage if ealder == 16
ge calc_hrly_wage17 = calc_hrly_wage if ealder == 17
ge calc_hrly_wage18 = calc_hrly_wage if ealder == 18
** generating a few age specific employment variables 
ge emplyd_1617 = emplyd if inrange(ealder, 16, 17)
ge emplyd_16 = emplyd if ealder == 16
ge emplyd_17 = emplyd if ealder == 17
ge emplyd_18 = emplyd if ealder == 18

keep if inrange(ealder, 16, 18) // dropping individuals above event-time 12, i.e indivudals who have turned 19.

** Table A.2: Variation in Average Hourly Wage by Sector
preserve
	collapse (count) observations = pnr ///
		 (count) observations1617 = emplyd_1617 ///
		 (count) observations17 = emplyd_17 (count) observations18 = emplyd_18 ///
		 (mean) avg_hrly_wage17 = calc_hrly_wage17 ///
		 (mean) avg_hrly_wage18 = calc_hrly_wage18 ///
		, by(ES15)

	sort ES15 
	replace observations1617 = . if observations1617 == 0
	replace observations17 = . if observations17 == 0
	replace observations18 = . if observations18 == 0
	ge mean_diff_pct = (avg_hrly_wage18-avg_hrly_wage17) / (0.5*(avg_hrly_wage17+avg_hrly_wage18)) * 100

	qui sum observations1617
	global tot = r(sum)
	gen emply_frac  = (observations1617 / $tot)*100
	drop if ES15 == .
	order ES15 observations* avg_hrly_wage* mean_diff_pct
	gsort - observations1617
	export excel using "$out\Apndx_Table2_excl_apprentices_avg_hrly_wage_ES15_sub`x'_dofile_descrp03.xls", replace firstrow(variables)
restore

** Table 1-Panel B.: Computed from data (monthly earnings/hours)
preserve
	keep if ES15 == 4711 | ES15 == 4719 // Supermarket sector codes.
	ge d = 1 // constant eq to 1 for all to find average and medians based on all observations
	collapse (count) observations = pnr ///
		 (count) observations1617 = emplyd_1617 ///
		 (count) observations17 = emplyd_17 (count) observations18 = emplyd_18 ///
		 (mean) avg_hrly_wage17 = calc_hrly_wage17 ///
		 (mean) avg_hrly_wage18 = calc_hrly_wage18 ///
		 (median) p50_hrly_wage17 = calc_hrly_wage17 ///
		 (median) p50_hrly_wage18 = calc_hrly_wage18 ///
		, by(d)

	drop d // droppng constant
	replace observations17 = . if observations17 == 0
	replace observations18 = . if observations18 == 0
	ge mean_diff_pct = (avg_hrly_wage18-avg_hrly_wage17) / (0.5*(avg_hrly_wage17+avg_hrly_wage18)) * 100
	ge p50_diff_pct = (p50_hrly_wage18-p50_hrly_wage17) / (0.5*(p50_hrly_wage17+p50_hrly_wage18)) * 100

	order observations* avg_hrly_wage* mean_diff_pct*
	gsort - observations1617
	export excel using "$out\Table1b_excl_apprentices_supermarket_4711_4719_excl_apprentices_avg_hrly_wage_ES15_sub4_dofile_descrp03.xls", replace firstrow(variables)
restore
